{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Simple embedding and sentiment analysis\n",
    "Inspired by: https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.0.0-alpha0\n",
      "2.2.4-tf\n",
      "[[12, 1], [41, 18], [39, 41], [6, 18], [49], [48], [30], [42, 41], [32, 41], [42, 18], [1, 13, 1, 29], [34, 7]]\n",
      "[[12  1  0  0]\n",
      " [41 18  0  0]\n",
      " [39 41  0  0]\n",
      " [ 6 18  0  0]\n",
      " [49  0  0  0]\n",
      " [48  0  0  0]\n",
      " [30  0  0  0]\n",
      " [42 41  0  0]\n",
      " [32 41  0  0]\n",
      " [42 18  0  0]\n",
      " [ 1 13  1 29]\n",
      " [34  7  0  0]]\n"
     ]
    }
   ],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "# for tf < version2, delete \"tensorflow.\"\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Activation, Dense\n",
    "from tensorflow.keras.layers import Flatten\n",
    "from tensorflow.keras.utils import plot_model\n",
    "from tensorflow.keras.layers import Embedding\n",
    "from tensorflow.keras.preprocessing.text import one_hot\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "\n",
    "print(tf.__version__)\n",
    "print(tf.keras.__version__)\n",
    "\n",
    "docs = ['Well done!',\n",
    "        'Good work',\n",
    "        'Great effort',\n",
    "        'Nice work',\n",
    "        'Excellent!',\n",
    "        'Wow!',\n",
    "        'Weak',\n",
    "        'Poor effort!',\n",
    "        'Not good',\n",
    "        'Poor work',\n",
    "        'Could have done better.',\n",
    "        'Very bad!']\n",
    "# define class labels\n",
    "labels = np.array([1,1,1,1,1,1,0,0,0,0,0,0])\n",
    "# integer encode the documents\n",
    "vocab_size = 50\n",
    "encoded_docs = [one_hot(d, vocab_size) for d in docs]\n",
    "print(encoded_docs)\n",
    "# pad documents to a max length of 4 words\n",
    "max_length = 4\n",
    "padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')\n",
    "print(padded_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential_2\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "embedding_1 (Embedding)      (None, 4, 8)              400       \n",
      "_________________________________________________________________\n",
      "flatten_1 (Flatten)          (None, 32)                0         \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 1)                 33        \n",
      "=================================================================\n",
      "Total params: 433\n",
      "Trainable params: 433\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "# define the model\n",
    "model = Sequential()\n",
    "model.add(Embedding(vocab_size, 8, input_length=max_length))\n",
    "model.add(Flatten())\n",
    "model.add(Dense(1, activation='sigmoid'))\n",
    "# compile the model\n",
    "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])\n",
    "# summarize the model\n",
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 100.000000\n"
     ]
    }
   ],
   "source": [
    "# fit the model\n",
    "model.fit(padded_docs, labels, epochs=200, verbose=0)\n",
    "# evaluate the model\n",
    "loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)\n",
    "print('Accuracy: %f' % (accuracy*100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[39, 14]]\n"
     ]
    }
   ],
   "source": [
    "test = ['Great job!']\n",
    "encoded_test = [one_hot(d, vocab_size) for d in test]\n",
    "print(encoded_test)\n",
    "padded_test = pad_sequences(encoded_test, maxlen=max_length, padding='post')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.823561]], dtype=float32)"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.predict(padded_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[8, 13, 7]]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[0.29199824]], dtype=float32)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test = ['This is bad!']\n",
    "encoded_test = [one_hot(d, vocab_size) for d in test]\n",
    "print(encoded_test)\n",
    "padded_test = pad_sequences(encoded_test, maxlen=max_length, padding='post')\n",
    "model.predict(padded_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
